* This file cleans and assembles the various data files used in the analysis
* It inputs the raw Geograin files, the Minneapolis Grain Exchange and Chicago 
* Mercantile Exchange (spot) data and outputs the combined Stata files used in the analysis.
* This file also creates the geospatial data used  in making the silo and rail network maps

clear all
set mem 4g

*** Setup file paths
** Aaron
*global path = "Aaron Path" 
** Jim
*global path = "Jim Path"
** Jon
cd "/Users/Jon/Dropbox/Rail and Oil"

*** Globals for harvest dates (defined by week harvest first reaches 90% complete 6-state average) 
* Wheat
global h2009w = date("9-27-2009", "MDY") 
global h2010w = date("10-3-2010", "MDY") 
global h2011w = date("9-18-2011", "MDY")
global h2012w = date("9-2-2012", "MDY") 
global h2013w = date("9-15-2013", "MDY") 
global h2014w = date("9-28-2014", "MDY") 
* Corn
global h2009c = date("12-13-2009", "MDY") 
global h2010c = date("10-31-2010", "MDY") 
global h2011c = date("11-13-2011", "MDY")
global h2012c = date("10-28-2012", "MDY") 
global h2013c = date("11-17-2013", "MDY") 
global h2014c = date("11-23-2014", "MDY") 
* Soybeans
global h2009s = date("11-22-2009", "MDY") 
global h2010s = date("10-24-2010", "MDY") 
global h2011s = date("11-6-2011", "MDY")
global h2012s = date("11-4-2012", "MDY") 
global h2013s = date("11-10-2013", "MDY") 
global h2014s = date("11-16-2014", "MDY") 

**** Process data on total rail traffic
*** For wheat spread regressions aggregate BNSF and CP
use "Data/MonthlyCarloads.dta", clear
* Keep only 2010 data and later
keep if WYEAR >= 2010
* Keep only RRs that transport oil from ND
gen keep = 0
replace keep = 1 if ORR_Alpha == "CPUS"
replace keep = 1 if ORR_Alpha == "BNSF"
keep if keep == 1
sort WYEAR WMO
collapse (sum) TotTraffic, by(WYEAR WMO)
rename WYEAR year
rename WMO month
sort year month
save "Data/BNSF&CPMonthlyCarloads.dta", replace
*** For corn and soy spread regressions aggregate BNSF, CP and UP
use "Data/MonthlyCarloads.dta", clear
keep if WYEAR >= 2010
gen keep = 0
replace keep = 1 if ORR_Alpha == "CPUS"
replace keep = 1 if ORR_Alpha == "BNSF"
replace keep = 1 if ORR_Alpha == "UP"
keep if keep == 1
sort WYEAR WMO
collapse (sum) TotTraffic, by(WYEAR WMO)
rename WYEAR year
rename WMO month
sort year month
save "Data/BNSF&CP&UPMonthlyCarloads.dta", replace

**** Make data for county centroid lat and long. to calc. distance
*** These data are from: 
insheet using "Data/Gaz_counties_national.txt", tab clear
rename intptlat latitude
rename intptlong longitude
rename geoid O_FIPS
sort O_FIPS
keep O_FIPS latitude longitude
save "Data/FIPSlatlong.dta", replace

**** Make Weather Data (Controls variables in main specifications)
*** These data are from: https://www.ncdc.noaa.gov/cdo-web/
insheet using "Data/ND_weather.csv", comma clear
keep station_name date tmin tmax
save "Data/Temp.dta", replace
insheet using "Data/IAND_weather.csv", comma clear
keep station_name date tmin tmax
save "Data/Temp2.dta", replace
insheet using "Data/MTSDMN_weather.csv", comma clear
keep station_name date tmin tmax
append using "Data/Temp.dta"
append using "Data/Temp2.dta"
gen O_ST = ""
replace O_ST = "MT" if station_name == "HELENA AIRPORT ASOS MT US"
replace O_ST = "MN" if station_name == "MINNEAPOLIS CRYSTAL AIRPORT MN US"
replace O_ST = "SD" if station_name == "PIERRE REGIONAL AIRPORT SD US"
replace O_ST = "ND" if station_name == "BISMARCK 5 NNW, ND US"
replace O_ST = "IA" if station_name == "DES MOINES INTERNATIONAL AIRPORT IA US"
replace O_ST = "NE" if station_name == "LINCOLN AIRPORT NE US"
gen temp = string(date, "%9.0f")
gen year = real(substr(temp,1,4))
gen month = real(substr(temp,5,2))
gen day = real(substr(temp,7,2))
sort O_ST year month
collapse (mean) tmin tmax (first) station_name, by(O_ST year month)
gen date = ym(year, month)
format date %tm
sort O_ST year month
save "Data/DailyTemps.dta", replace 

**** Process NASS annual crop production data
*** These data are from: https://quickstats.nass.usda.gov/
* Wheat
insheet using "Data/NASSWheatState.csv", comma clear
keep if period == "YEAR"
rename value wheat 
replace wheat = wheat/10^6
* Limit to states in our GeoGrain sample
gen keep = 0
replace keep = 1 if state == "NORTH DAKOTA"
replace keep = 1 if state == "SOUTH DAKOTA"
replace keep = 1 if state == "MONTANA"
replace keep = 1 if state == "MINNESOTA"
keep if keep == 1
replace state = "ND" if state == "NORTH DAKOTA"
replace state = "SD" if state == "SOUTH DAKOTA"
replace state = "MT" if state == "MONTANA"
replace state = "MN" if state == "MINNESOTA"
rename year cropyear
sort cropyear state
keep cropyear state wheat
save "Data/NASSWheatState.dta", replace
* Soy
insheet using "Data/NASSSoyState.csv", comma clear
keep if period == "YEAR"
rename value soy
replace soy = soy/10^6
* Limit to states in our GeoGrain sample
gen keep = 0
replace keep = 1 if state == "NORTH DAKOTA"
replace keep = 1 if state == "SOUTH DAKOTA"
replace keep = 1 if state == "MONTANA"
replace keep = 1 if state == "MINNESOTA"
replace keep = 1 if state == "NEBRASKA"
replace keep = 1 if state == "IOWA"
keep if keep == 1
replace state = "ND" if state == "NORTH DAKOTA"
replace state = "SD" if state == "SOUTH DAKOTA"
replace state = "MT" if state == "MONTANA"
replace state = "MN" if state == "MINNESOTA"
replace state = "NE" if state == "NEBRASKA"
replace state = "IA" if state == "IOWA"
rename year cropyear
sort cropyear state
keep cropyear state soy
save "Data/NASSSoyState.dta", replace
* Corn
insheet using "Data/NASSCornState.csv", comma clear
keep if period == "YEAR"
rename value corn
replace corn = corn/10^6
* Limit to states in our GeoGrain sample
gen keep = 0
replace keep = 1 if state == "NORTH DAKOTA"
replace keep = 1 if state == "SOUTH DAKOTA"
replace keep = 1 if state == "MONTANA"
replace keep = 1 if state == "MINNESOTA"
replace keep = 1 if state == "NEBRASKA"
replace keep = 1 if state == "IOWA"
keep if keep == 1
replace state = "ND" if state == "NORTH DAKOTA"
replace state = "SD" if state == "SOUTH DAKOTA"
replace state = "MT" if state == "MONTANA"
replace state = "MN" if state == "MINNESOTA"
replace state = "NE" if state == "NEBRASKA"
replace state = "IA" if state == "IOWA"
rename year cropyear
sort cropyear state
keep cropyear state corn
save "Data/NASSCornState.dta", replace

**** Clean diesel prices
*** These data are from: https://www.eia.gov/dnav/pet/pet_pri_gnd_dcus_r20_w.htm
* Monthly
use "Data/Diesel_prices.dta", clear
rename WeeklyMidwestNo2DieselRetai Pdiesel 
gen year = year(dofd(Date))
gen month = month(dofd(Date))
gen date = ym(year, month)
keep if year >= 2010 & year <= 2015
format date %tm
sort date 
collapse (mean) Pdiesel, by(date)
tsset date
save "Data/Diesel_clean.dta", replace
* Weekly
use "Data/Diesel_prices.dta", clear
rename WeeklyMidwestNo2DieselRetai Pdiesel 
gen year = year(dofd(Date))
gen week = week(dofd(Date))
gen date = yw(year, week)
keep if year >= 2010 & year <= 2015
format date %tw
sort date 
collapse (mean) Pdiesel, by(date)
tsset date
save "Data/Diesel_weekly_clean.dta", replace

**** Clean Minneapolis Spot Price Data
*** These data are from: https://www.marketnews.usda.gov/mnp/ls-report-config
* Open market info data
insheet using "Data/MN_Spot.csv", comma clear
* Grain board calls HRS "Dark Northern Spring"
keep if class == "DARK NORTHERN SPRING"
* Handle dates
split reportdate, p(/)
gen month = real(reportdate1)
gen day = real(reportdate2)
gen year = 2000+real(reportdate3)
gen date = mdy(month, day, year)
format date %td
* Keep relevant prices
sort date variety gradedescription transmode pricingpoint deliveryperiod
* Cash delivery period is all "Rail" shipments
* Cash is also pricing point "Mills-Processors via Chicago Rail Yards"
keep if deliveryperiod == "Cash"
* Keep relevant years
keep if year >= 2010
keep if year <= 2015
* Clean up and save
drop month day year reportdate1 reportdate2 reportdate3
sort date
collapse (mean) bid_h bid_l (first) reportdate location class variety units transmode pricingpoint deliveryperiod, by(date)
save "Data/MN_Spot", replace

*** Clean Chicago Soy Spot Price Data
** These data are from: https://www.marketnews.usda.gov/mnp/ls-report-config
* Open market info data
insheet using "Data/CHI_soy_Spot.csv", comma clear
* Handle dates
split reportdate, p(/)
gen month = real(reportdate1)
gen day = real(reportdate2)
gen year = 2000+real(reportdate3)
gen date = mdy(month, day, year)
format date %td
* Keep relevant prices
sort date transmode pricingpoint deliveryperiod
* Delivery period can be 15-Day or 30-Day, use only 15 for now
keep if deliveryperiod == "15 Day Delivery"
* Keep relevant years
keep if year >= 2010
keep if year <= 2015
* Clean up and save
drop month day year reportdate1 reportdate2 reportdate3
sort date
collapse (mean) bid_h bid_l (first) reportdate location class units transmode pricingpoint deliveryperiod, by(date)
rename bid_h bid_h_soy
rename bid_l bid_l_soy
save "Data/CHI_soy_Spot.dta", replace

*** Clean Chicago Corn Spot Price Data
** These data are from: https://www.marketnews.usda.gov/mnp/ls-report-config
* Open market info data
insheet using "Data/CHI_corn_Spot.csv", comma clear
* Handle dates
split reportdate, p(/)
gen month = real(reportdate1)
gen day = real(reportdate2)
gen year = 2000+real(reportdate3)
gen date = mdy(month, day, year)
format date %td
* Keep relevant prices
sort date transmode pricingpoint deliveryperiod
* Pricing point is Terminals Elevators or Mills and  Processors, keep Mills Processors
* for consistency with wheat
keep if pricingpoint == "Mills and Processors"
* Delivery period can be 15-Day or 30-Day, use only 15 
keep if deliveryperiod == "15 Day Delivery"
* Keep relevant years
keep if year >= 2010
keep if year <= 2015
* Clean up and save
drop month day year reportdate1 reportdate2 reportdate3
sort date
collapse (mean) bid_h bid_l (first) reportdate location class units transmode pricingpoint deliveryperiod, by(date)
rename bid_h bid_h_corn
rename bid_l bid_l_corn
save "Data/CHI_corn_Spot.dta", replace

*** Clean Geograin Data
* Definitions
* Futures constracts = LLYYYYM, LL = locations/exchange, YYYY = year, M = delivery period  
* Locations/exchanges C = Chicago/CBOT (corn), MW = Minneapolis/MGE (wheat), KW = Kansas City/KCBT (wheat), S = Chicago/CBOT (soy)
* Delivery period (M)
* March (H), May (K), July (N), September (U) & December (Z)
* Products
* HRS = "Hard, red, spring"
* HRW = "Hard, red, winter"
** Market (silo) information file
insheet using "Data/markets.csv", comma clear
sort marketid
* Calculate (distance as the crow flies) to Minneapolis and Chicago
geodist latitude longitude 44.9706756 -93.3315183, gen(MNdist) miles
geodist latitude longitude 41.8333925 -88.0123393, gen(CHIdist) miles
rename o_fips O_FIPS
save "Data/markets.dta", replace 
*** Clean wheat price data
use "Data/GeograinPrices_v11.dta", clear
* Covert price to price in $/bu from cents/bu 
replace price = price/100
* Merge in silo information
sort marketid
merge m:1 marketid using "/Users/Jon/Dropbox/Rail and Oil/Data/markets.dta", nogen
* Drop Texas observation (export port)
drop if marketid == 5590
* Drop Oregon observations (export terminal)
drop if state == "OR"
* Limit sample to HRS Wheat
keep if commodityid == 34
* Create time variables
gen year = year(date)
gen month = month(date)
* Save observations from all states for which we have data
* Save temporary file
save "Data/AllTemp.dta", replace
* File using only ND observations
keep if state == "ND"
* Save temporary file
save "Data/NDTemp.dta", replace
*** Clean corn price data
use "Data/GeograinPrices_v11.dta", clear
replace price = price/100
* Merge in silo information
sort marketid
merge m:1 marketid using "/Users/Jon/Dropbox/Rail and Oil/Data/markets.dta", nogen
* Drop Texas observation (export port)
drop if marketid == 5590
* Drop Oregon observations (export terminal)
drop if state == "OR"
* Limit sample to corn
keep if commodityid == 4
* Create time variables
gen year = year(date)
gen month = month(date)
* Save observations from all states
* Save temporary file
save "Data/AllTempCorn.dta", replace
*** Clean soy price data
use "Data/GeograinPrices_v11.dta", clear
replace price = price/100
* Merge in silo information
sort marketid
merge m:1 marketid using "/Users/Jon/Dropbox/Rail and Oil/Data/markets.dta", nogen
* Drop Texas observation (export port)
drop if marketid == 5590
* Drop Oregon observations (export terminal)
drop if state == "OR"
* Limit sample to soybeans
keep if commodityid == 17
* Create time variables
gen year = year(date)
gen month = month(date)
* Save observations from all states
* Save temporary file
save "Data/AllTempSoy.dta", replace
*** Make file for wheat silo locations in ND, MN, SD and MT
use "Data/AllTemp.dta", clear
keep marketid state city company zip fip latitude longitude
duplicates drop
sort marketid
save "Data/AllWheatSilos.dta", replace

*** Aggregations/averages of GeoGrain data
** Make a Monthly Average Version of the Price Data 
use "Data/NDTemp.dta", clear
* Keep relevant years
keep if year >= 2010 & year <= 2015
* Focus on cash prices one month prior to delivery (1-month horrizon, h=1)
gen keep = 0
replace keep = 1 if month == delmonth-1
keep if keep == 1
* Futures contract delivery months are March, May, July, September and December
keep if delmonth == 3 | delmonth == 5 | delmonth == 7 | delmonth == 9 | delmonth == 12
* Calculate mean price and basis during 1-month horizon (monthly frequence to match STB rail car data)
sort marketid year month
collapse (mean) price basis delmonth (first) state city fip zip latitude longitude, by(marketid year month)
* Create indicators variables for quintiles of implied basis discount/transportation cost
gen temp = basis*-1
xtile basis_quint = temp, nq(5)
* Save file
save "Data/NDMonthly.dta", replace
** Create month-by-year files used in maps
foreach y in 2010 2011 2012 2013 2014 2015 {
foreach m in 3 5 7 9 12 {
use "Data/NDMonthly.dta", clear
qui: keep if year == `y'
qui: keep if delmonth == `m'
qui: global obs = _N+5
qui: set obs $obs
foreach j in 5 4 3 2 1 {
qui: replace marketid = 0001 in $obs
qui: replace basis_quint = `j' in $obs
qui: replace latitude = latitude[1] in $obs
qui: replace longitude = longitude[1] in $obs
global obs = $obs-1
}
sort marketid
qui: save "Data/ND`y'm`m'.dta", replace
}
}

*** Use GeoGrain data to create carry measures for wheat
** Open data for all silos for which we have data
use "Data/AllTemp.dta", clear
* Keep relevant years
keep if year >= 2012 & year <= 2015
* Average cash prices by transaction date and delivery month
sort date marketid delmonth
collapse (mean) price, by(date marketid delmonth)
reshape wide price, i( marketid date) j( delmonth)
gen month = month(date)
gen year = year(date)
* Calculate carry
* 1-month carry
gen carry1mo = 0
replace carry1mo = price2 - price0 if month == 1
replace carry1mo = price3 - price0 if month == 2
replace carry1mo = price4 - price0 if month == 3
replace carry1mo = price5 - price0 if month == 4
replace carry1mo = price6 - price0 if month == 5
replace carry1mo = price7 - price0 if month == 6
replace carry1mo = price8 - price0 if month == 7
replace carry1mo = price9 - price0 if month == 8
replace carry1mo = price10 - price0 if month == 9
replace carry1mo = price11 - price0 if month == 10
replace carry1mo = price12 - price0 if month == 11
replace carry1mo = price1 - price0 if month == 12
gen carry3mo = 0
replace carry3mo = price4 - price0 if month == 1
replace carry3mo = price5 - price0 if month == 2
replace carry3mo = price6 - price0 if month == 3
replace carry3mo = price7 - price0 if month == 4
replace carry3mo = price8 - price0 if month == 5
replace carry3mo = price9 - price0 if month == 6
replace carry3mo = price10 - price0 if month == 7
replace carry3mo = price11 - price0 if month == 8
replace carry3mo = price12 - price0 if month == 9
replace carry3mo = price1 - price0 if month == 10
replace carry3mo = price2 - price0 if month == 11
replace carry3mo = price3 - price0 if month == 12
gen carry6mo = 0 
replace carry6mo = price7 - price0 if month == 1
replace carry6mo = price8 - price0 if month == 2
replace carry6mo = price9 - price0 if month == 3
replace carry6mo = price10 - price0 if month == 4
replace carry6mo = price11 - price0 if month == 5
replace carry6mo = price12 - price0 if month == 6
replace carry6mo = price1 - price0 if month == 7
replace carry6mo = price2 - price0 if month == 8
replace carry6mo = price3 - price0 if month == 9
replace carry6mo = price4 - price0 if month == 10
replace carry6mo = price5 - price0 if month == 11
replace carry6mo = price6 - price0 if month == 12
sort marketid year month
order marketid date carry1mo carry3mo carry6mo
* Collapse to monthly to match rail data
sort year month
collapse (mean) carry1mo carry3mo carry6mo (count) count1=carry1mo count3=carry3mo count6=carry6mo, by(marketid year month)
gen date = ym(year,month)
format date %tm
sort marketid 
merge m:1 marketid using "/Users/Jon/Dropbox/Rail and Oil/Data/markets.dta", nogen
* Merge in temperature data
gen O_ST = state
sort O_ST year month
merge  m:1 O_ST year month using "Data/DailyTemps.dta", nogen
* Merge in rail traffic data
sort year month
* Use the version with CP and BNSF data
merge m:1 year month  using "Data/BNSF&CPMonthlyCarloads.dta", nogen
* Convert 1, 3 and 6 month carry to dollars per month
replace carry3mo = carry3mo/3
replace carry6mo = carry6mo/6
save "/Users/Jon/Dropbox/rail and oil/Data/Maps/MonthlyCarry.dta", replace

*** Use GeoGrain data to create carry measures for corn
** Open data for all silos for which we have data
use "Data/AllTempCorn.dta", clear
* Keep relevant years
keep if year >= 2010 & year <= 2015
* Average cash prices by transaction date and delivery month
sort date marketid delmonth
collapse (mean) price, by(date marketid delmonth)
reshape wide price, i( marketid date) j( delmonth)
gen month = month(date)
gen year = year(date)
* Calculate carry
* 1-month carry
gen carry1mo = 0
replace carry1mo = price2 - price0 if month == 1
replace carry1mo = price3 - price0 if month == 2
replace carry1mo = price4 - price0 if month == 3
replace carry1mo = price5 - price0 if month == 4
replace carry1mo = price6 - price0 if month == 5
replace carry1mo = price7 - price0 if month == 6
replace carry1mo = price8 - price0 if month == 7
replace carry1mo = price9 - price0 if month == 8
replace carry1mo = price10 - price0 if month == 9
replace carry1mo = price11 - price0 if month == 10
replace carry1mo = price12 - price0 if month == 11
replace carry1mo = price1 - price0 if month == 12
gen carry3mo = 0
replace carry3mo = price4 - price0 if month == 1
replace carry3mo = price5 - price0 if month == 2
replace carry3mo = price6 - price0 if month == 3
replace carry3mo = price7 - price0 if month == 4
replace carry3mo = price8 - price0 if month == 5
replace carry3mo = price9 - price0 if month == 6
replace carry3mo = price10 - price0 if month == 7
replace carry3mo = price11 - price0 if month == 8
replace carry3mo = price12 - price0 if month == 9
replace carry3mo = price1 - price0 if month == 10
replace carry3mo = price2 - price0 if month == 11
replace carry3mo = price3 - price0 if month == 12
gen carry6mo = 0 
replace carry6mo = price7 - price0 if month == 1
replace carry6mo = price8 - price0 if month == 2
replace carry6mo = price9 - price0 if month == 3
replace carry6mo = price10 - price0 if month == 4
replace carry6mo = price11 - price0 if month == 5
replace carry6mo = price12 - price0 if month == 6
replace carry6mo = price1 - price0 if month == 7
replace carry6mo = price2 - price0 if month == 8
replace carry6mo = price3 - price0 if month == 9
replace carry6mo = price4 - price0 if month == 10
replace carry6mo = price5 - price0 if month == 11
replace carry6mo = price6 - price0 if month == 12
sort marketid year month
order marketid date carry1mo carry3mo carry6mo
* Collapse to monthly to match rail data
sort year month
collapse (mean) carry1mo carry3mo carry6mo (count) count1=carry1mo count3=carry3mo count6=carry6mo, by(marketid year month)
gen date = ym(year,month)
format date %tm
sort marketid 
merge m:1 marketid using "/Users/Jon/Dropbox/Rail and Oil/Data/markets.dta", nogen
* Merge in temperature data
gen O_ST = state
sort O_ST year month
merge  m:1 O_ST year month using "Data/DailyTemps.dta", nogen
* Merge in rail traffic data
sort year month
* Use the version with CP and BNSF data
merge m:1 year month  using "Data/BNSF&CP&UPMonthlyCarloads.dta", nogen
* Convert 1, 3 and 6 month carry to dollars per month
replace carry3mo = carry3mo/3
replace carry6mo = carry6mo/6
save "/Users/Jon/Dropbox/rail and oil/Data/Maps/CornMonthlyCarry.dta", replace

*** Use GeoGrain data to create carry measures for soybeans
use "Data/AllTempSoy.dta", clear
* Keep relevant years
keep if year >= 2010 & year <= 2015
* Average cash prices by transaction date and delivery month
sort date marketid delmonth
collapse (mean) price, by(date marketid delmonth)
reshape wide price, i( marketid date) j( delmonth)
gen month = month(date)
gen year = year(date)
* Calculate carry
* 1-month carry
gen carry1mo = 0
replace carry1mo = price2 - price0 if month == 1
replace carry1mo = price3 - price0 if month == 2
replace carry1mo = price4 - price0 if month == 3
replace carry1mo = price5 - price0 if month == 4
replace carry1mo = price6 - price0 if month == 5
replace carry1mo = price7 - price0 if month == 6
replace carry1mo = price8 - price0 if month == 7
replace carry1mo = price9 - price0 if month == 8
replace carry1mo = price10 - price0 if month == 9
replace carry1mo = price11 - price0 if month == 10
replace carry1mo = price12 - price0 if month == 11
replace carry1mo = price1 - price0 if month == 12
gen carry3mo = 0
replace carry3mo = price4 - price0 if month == 1
replace carry3mo = price5 - price0 if month == 2
replace carry3mo = price6 - price0 if month == 3
replace carry3mo = price7 - price0 if month == 4
replace carry3mo = price8 - price0 if month == 5
replace carry3mo = price9 - price0 if month == 6
replace carry3mo = price10 - price0 if month == 7
replace carry3mo = price11 - price0 if month == 8
replace carry3mo = price12 - price0 if month == 9
replace carry3mo = price1 - price0 if month == 10
replace carry3mo = price2 - price0 if month == 11
replace carry3mo = price3 - price0 if month == 12
gen carry6mo = 0 
replace carry6mo = price7 - price0 if month == 1
replace carry6mo = price8 - price0 if month == 2
replace carry6mo = price9 - price0 if month == 3
replace carry6mo = price10 - price0 if month == 4
replace carry6mo = price11 - price0 if month == 5
replace carry6mo = price12 - price0 if month == 6
replace carry6mo = price1 - price0 if month == 7
replace carry6mo = price2 - price0 if month == 8
replace carry6mo = price3 - price0 if month == 9
replace carry6mo = price4 - price0 if month == 10
replace carry6mo = price5 - price0 if month == 11
replace carry6mo = price6 - price0 if month == 12
sort marketid year month
order marketid date carry1mo carry3mo carry6mo
* Collapse to monthly to match rail data
sort year month
collapse (mean) carry1mo carry3mo carry6mo (count) count1=carry1mo count3=carry3mo count6=carry6mo, by(marketid year month)
gen date = ym(year,month)
format date %tm
sort marketid 
merge m:1 marketid using "/Users/Jon/Dropbox/Rail and Oil/Data/markets.dta", nogen
* Merge in temperature data
gen O_ST = state
sort O_ST year month
merge  m:1 O_ST year month using "Data/DailyTemps.dta", nogen
* Merge in rail traffic data
sort year month
* Use the version with CP and BNSF data
merge m:1 year month  using "Data/BNSF&CP&UPMonthlyCarloads.dta", nogen
* Convert 1, 3 and 6 month carrs to dollars per month
replace carry3mo = carry3mo/3
replace carry6mo = carry6mo/6
save "/Users/Jon/Dropbox/rail and oil/Data/Maps/SoyMonthlyCarry.dta", replace

**** Create combined files merging the GeoGrain wheat and Minneapolis market hub spot data
** Use all silos for which we have data
use "Data/AllTemp.dta", clear
* Keep relevant years
keep if year >= 2010 & year <= 2015
* Merge in Minneapolis spot market data (wheat)
sort date
merge m:1 date using "Data/MN_Spot", nogen
* Calculate and average spot price as the average of the daily "high" and "low" prices
gen AvgMNspot = (bid_h+bid_l)/2
* Keep only GeoGrain spot prices
keep if delmonth == 0
* Calculate the spread between Minneapolis spot price and the GeoGrain silo cash price (spot)
gen spread = AvgMNspot-price
* Save a temporary file used to construct monthly and weekly average files
save "Data/AllSilos.dta", replace
* Make weekly average file (by market/silo)
use "Data/AllSilos.dta", clear
gen week = week(date)
#delimit;
sort marketid year week;
collapse (mean) price bid_h bid_l AvgMNspot spread (first) state city company O_FIPS MNdist 
zip fip latitude longitude class units variety transmode pricingpoint deliveryperiod,
by(marketid year week);
#delimit cr
save "Data/MktMNSpreadWeeklyAllSilos.dta", replace
*generate weekly average version
collapse (mean) spread, by(year week)
save "Data/MktMNSpreadWeeklyAllSilosAVG.dta", replace

* Make Monthly average file (by market/silo)
use "Data/AllSilos.dta", clear
*gen month = month(date)
#delimit;
sort marketid year month;
collapse (mean) price bid_h bid_l AvgMNspot spread (first) state city company O_FIPS MNdist
zip fip latitude longitude class units variety transmode pricingpoint deliveryperiod,
by(marketid year month);
#delimit cr
* Merge in temperature data
gen O_ST = state
sort O_ST year month
merge  m:1 O_ST year month using "Data/DailyTemps.dta", nogen
* Merge in rail traffic data
sort year month
* Use the version with CP and BNSF data
merge m:1 year month  using "Data/BNSF&CPMonthlyCarloads.dta", nogen
save "Data/MktMNSpreadMonthlyAllSilos.dta", replace
sort year month
collapse (mean) price spread AvgMNspot MNdist, by(year month)
gen date = ym(year, month)
format date %tm
save "Data/MktMNSpreadMonthlyAvg.dta", replace
** Average file (all years by market/silo)
use "Data/AllSilos.dta", clear
* Create silo-level average spread
sort marketid
by marketid: egen meanspread = mean(spread)
* Create change in spread relative to mean spread
gen chngspread = spread-meanspread
#delimit ;
sort marketid year;
collapse (mean) price bid_h bid_l AvgMNspot spread meanspread chngspread (first) state city company O_FIPS MNdist
zip fip latitude longitude class units variety transmode pricingpoint deliveryperiod,
by(marketid year month);
#delimit cr
xtile spread_quint = spread, nq(5)
xtile meanspread_quint = meanspread, nq(5)
xtile chngspread_quint = chngspread, nq(5)
xtile price_quint = -1*price, nq(5)
pctile PCranges = spread, nq(5)
sum spread if spread_quint == 1
sum spread if spread_quint == 2
sum spread if spread_quint == 3
sum spread if spread_quint == 4
sum spread if spread_quint == 5
tabstat chngspread, by(chngspread_quint)
save "Data/MktMNSpreadMonthlyAllSilos.dta", replace
sort year month
collapse (mean) price spread meanspread chngspread AvgMNspot MNdist, by(year month)
gen date = ym(year, month)
format date %tm
save "Data/MktMNSpreadMonthlyAvg.dta", replace

*** Create combined files merging the GeoGrain corn and Chicago market hub spot data
use "Data/AllTempCorn.dta", clear
* Keep relevant years
keep if year >= 2010 & year <= 2015
* Merge in Chicago corn spot market data
sort date
merge m:1 date using "Data/Chi_corn_Spot", nogen
* Calculate and average spot price as the average of the daily "high" and "low" prices
gen AvgCornSpot = (bid_h+bid_l)/2
* Keep only GeoGrain spot prices
keep if delmonth == 0
* Calculate the spread between Chicago spot price and the GeoGrain silo cash price (spot)
gen spread = AvgCornSpot-price
* Save a temporary file used to construct monthly and weekly average files
save "Data/AllSilosCorn.dta", replace
* Make weekly average file (by market/silo)
use "Data/AllSilosCorn.dta", clear
gen week = week(date)
#delimit;
sort marketid year week;
collapse (mean) price bid_h bid_l AvgCornSpot spread (first) state city company O_FIPS CHIdist
zip fip latitude longitude class units transmode pricingpoint deliveryperiod,
by(marketid year week);
#delimit cr
save "Data/MktCornSpreadWeeklyAllSilos.dta", replace
* Make Monthly average file (by market/silo)
use "Data/AllSilosCorn.dta", clear
#delimit;
sort marketid year month;
collapse (mean) price bid_h bid_l AvgCornSpot spread (first) state city company O_FIPS CHIdist
zip fip latitude longitude class units transmode pricingpoint deliveryperiod,
by(marketid year month);
#delimit cr
* Merge in temperature data
gen O_ST = state
sort O_ST year month
merge  m:1 O_ST year month using "Data/DailyTemps.dta", nogen
* Merge in rail traffic data
sort year month
* Use the version with CP, UP and BNSF data
merge m:1 year month  using "Data/BNSF&CP&UPMonthlyCarloads.dta", nogen
save "Data/MktCornSpreadMonthlyAllSilos.dta", replace

*** Create combined files merging the GeoGrain soy and Chicago market hub spot data
use "Data/AllTempSoy.dta", clear
* Keep relevant years
keep if year >= 2010 & year <= 2015
* Merge in Chicago corn spot market data
sort date
merge m:1 date using "Data/Chi_soy_Spot", nogen
* Calculate and average spot price as the average of the daily "high" and "low" prices
gen AvgSoySpot = (bid_h+bid_l)/2
* Keep only GeoGrain spot prices
keep if delmonth == 0
* Calculate the spread between Chicago spot price and the GeoGrain silo cash price (spot)
gen spread = AvgSoySpot-price
* Save a temporary file used to construct monthly and weekly average files
save "Data/AllSilosSoy.dta", replace
* Make weekly average file (by market/silo)
use "Data/AllSilosSoy.dta", clear
gen week = week(date)
#delimit;
sort marketid year week;
collapse (mean) price bid_h bid_l AvgSoySpot spread (first) state city company O_FIPS CHIdist
zip fip latitude longitude class units transmode pricingpoint deliveryperiod,
by(marketid year week);
#delimit cr
save "Data/MktSoySpreadWeeklyAllSilos.dta", replace
* Make Monthly average file (by market/silo)
use "Data/AllSilosSoy.dta", clear
#delimit;
sort marketid year month;
collapse (mean) price bid_h bid_l AvgSoySpot spread (first) state city company O_FIPS CHIdist
zip fip latitude longitude class units transmode pricingpoint deliveryperiod,
by(marketid year month);
#delimit cr
* Merge in temperature data
gen O_ST = state
sort O_ST year month
merge  m:1 O_ST year month using "Data/DailyTemps.dta", nogen
* Merge in rail traffic data
sort year month
* Use the version with CP, UP and BNSF data
merge m:1 year month  using "Data/BNSF&CP&UPMonthlyCarloads.dta", nogen
save "Data/MktSoySpreadMonthlyAllSilos.dta", replace

*** Make files for maps
** ND map files
* These geospatial data come from: The ND state GIS office (Jan 25, 2017)
* Make state boundary shapefile
#delimit;
shp2dta using "Data/Maps/NDHUB_STATE_polygon.shp", 
data("Data/Maps/ND_d") coor("Data/Maps/ND_c") genid(_ID) replace;
#delimit cr
#delimit;
** ND Railroads
* These geospatial data come from: The ND state GIS office (Jan 25, 2017)
shp2dta using "Data/Maps/NDHUB_RAILROADS_line.shp", 
data("Data/Maps/RR_d") coor("Data/Maps/RR_c") genid(_ID) replace;
#delimit cr
** US map files
* Make US state boundary files
* These geospatial data come from Jon's web page
#delimit;
shp2dta using "Data/Maps/cb_2016_us_state_500k.shp", 
data("Data/Maps/States_d") coor("Data/Maps/States_c") genid(_ID) replace;
#delimit cr

*** Process rail network maps to get ownership and locations
** Clean network data RR data
use "Data/Maps/RR_d.dta", clear
* Update rail ownership information
split RAIL_ID
gen RR = "Other"
replace RR = "BNSF" if RAIL_ID1 == "BN"
replace RR = "BNSF" if RAIL_ID1 == "BURLINGTON"
replace RR = "BNSF" if RAIL_ID1 == "GN"
replace RR = "BNSF" if RAIL_ID1 == "GREAT"
replace RR = "CP" if RAIL_ID1 == "CANADIAN"
replace RR = "CP" if RAIL_ID1 == "CP"
replace RR = "CP" if RAIL_ID1 == "SOO"
replace RR = "RRVW" if RAIL_ID1 == "RED"
* Save ownership information file
keep _ID RR ABAND_YR
duplicates drop
sort _ID
save "/Users/Jon/Dropbox/rail and oil/Data/Maps/RR_info.dta", replace
** Merge rail owndership into coordinate file
use "/Users/Jon/Dropbox/rail and oil/Data/Maps/RR_c.dta"
merge m:1 _ID using "/Users/Jon/Dropbox/rail and oil/Data/Maps/RR_info.dta", nogen
duplicates drop
* Save appended coordinate file
save "/Users/Jon/Dropbox/rail and oil/Data/Maps/RR_c_clean.dta", replace
** Create a file containing locations of oil terminals (Genscape) and grain silos (Geograin)
use "Data/Maps/NDAvg2014.dta", clear
keep marketid latitude longitude
sort marketid
merge 1:1 marketid using "Data/GrainElevatorRR.dta", nogen
drop rr2
rename rr1 rr
gen type = "GeoGrain"
append using "Data/Maps/GenscapeTerminals.dta"
replace rr = "CPUS" if rr == "CP"
replace type = "Genscape" if terminal != ""
egen terminalid = group(terminal)
gen id = marketid
replace id = terminalid if type == "Genscape" 
replace longitude = x if type == "Genscape" 
replace latitude = y if type == "Genscape"
egen typeid = group(type)
keep id latitude longitude marketid terminalid rr_id route rr terminal type typeid
label define types 1 "Oil Terminal" 2 "Grain Market"
label values typeid types
save  "Data/Maps/locations.dta", replace

*** Create monthly average spread files used in maps
* Wheat
foreach y in 2012 2013 2014 2015 {
foreach m in 1 2 3 4 5 6 7 8 9 10 11 12 {
qui: use "Data/MktMNSpreadMonthlyAllSilos.dta", clear
qui: keep if year == `y'
qui: keep if month == `m'
qui: global obs = _N+5
qui: set obs $obs
foreach j in  6 5 4 3 2 1 {
qui: replace marketid = 0001 in $obs
qui: replace spread_quint = `j' in $obs
qui: replace meanspread_quint = `j' in $obs
qui: replace chngspread_quint = `j' in $obs
qui: replace price_quint = `j' in $obs
qui: replace latitude = latitude[1] in $obs
qui: replace longitude = longitude[1] in $obs
qui: replace state = "ND" in $obs
qui: replace year = `y' in $obs
qui: replace month = `m' in $obs
global obs = $obs-1
}
qui: save "Data/MktMNSpreadMonthly`y'm`m'.dta", replace
}
}

*** Make a version of the data that shifts everything to "harvest time"
*** Wheat
** Create daily versions of oil cars, total traffic, weather and diesel prices
* Oil cars
use "Data/CrudeOilGrain.dta", clear
* Relvant years
keep if WYEAR >= 2010
keep if WYEAR <= 2015
* ND shipments
keep if O_ST == "ND"
* Akshaya's definitions for oil
replace STCC5_Title = "Oil" if STCC5 == 13111 | STCC5 == 49101 | STCC5 == 49151
* Export temp file with Oil cars
keep if STCC5_Title == "Oil"
gen date = mdy(WMO, WDAY, WYEAR)
format date %td
* Convert to 1000s of cars
replace Cars = Cars/1000
rename Cars oilcars_stb
keep date WYEAR WMO oilcars_stb
*** Create variable for crop years
gen year = WYEAR
gen month = WMO
gen cropyear = 0
replace cropyear = 2009 if year == 2010 & month <= 8
replace cropyear = 2010 if year == 2010 & month >= 9
replace cropyear = 2010 if year == 2011 & month <= 8
replace cropyear = 2011 if year == 2011 & month >= 9
replace cropyear = 2011 if year == 2012 & month <= 8
replace cropyear = 2012 if year == 2012 & month >= 9
replace cropyear = 2012 if year == 2013 & month <= 8
replace cropyear = 2013 if year == 2013 & month >= 9
replace cropyear = 2013 if year == 2014 & month <= 8
replace cropyear = 2014 if year == 2014 & month >= 9
replace cropyear = 2014 if year == 2015 & month <= 8
replace cropyear = 2015 if year == 2015 & month >= 9
* Loop over years and assign post harvest
gen m = .
foreach y in 2009 2010 2011 2012 2013 2014 {
foreach m in  1 2 3 4 5 6 7 8 9 10 11 12 13 {
*foreach y in 2010  {
*foreach m in 1  {
if `y' == 2009 {
local harvest = $h2009w
} 
if `y' == 2010 {
local harvest = $h2010w
} 
if `y' == 2011 {
local harvest = $h2011w
}
if `y' == 2012 {
local harvest = $h2012w
}
if `y' == 2013 {
local harvest = $h2013w
}
if `y' == 2014 {
local harvest = $h2014w
}
replace m = `m' if date > `harvest'+(`m'-1)*28 & date <= `harvest'+(`m')*28 & cropyear == `y' 
}
}
sort cropyear m
drop if m == .
collapse (sum) oilcars_stb, by(cropyear m)
save "Data/TempOilCarsCropYearMonth_Wheat.dta", replace
* Total traffic
use "/Users/Jon/Dropbox/Rail and Oil/Data/DailyCarloads.dta", clear
keep if ORR_Alpha == "BNSF" | ORR_Alpha == "CPUS"
keep if WYEAR >= 2010
keep if WYEAR <= 2015
sort WYEAR WMO WDAY
collapse (sum) TotTraffic, by(WYEAR WMO WDAY)
save "/Users/Jon/Dropbox/Rail and Oil/Data/DailyCarloadsBNSF&CP.dta", replace
gen date = mdy(WMO, WDAY, WYEAR)
*** Create variable for crop years
gen year = WYEAR
gen month = WMO
gen cropyear = 0
replace cropyear = 2009 if year == 2010 & month <= 8
replace cropyear = 2010 if year == 2010 & month >= 9
replace cropyear = 2010 if year == 2011 & month <= 8
replace cropyear = 2011 if year == 2011 & month >= 9
replace cropyear = 2011 if year == 2012 & month <= 8
replace cropyear = 2012 if year == 2012 & month >= 9
replace cropyear = 2012 if year == 2013 & month <= 8
replace cropyear = 2013 if year == 2013 & month >= 9
replace cropyear = 2013 if year == 2014 & month <= 8
replace cropyear = 2014 if year == 2014 & month >= 9
replace cropyear = 2014 if year == 2015 & month <= 8
replace cropyear = 2015 if year == 2015 & month >= 9
* Loop over years and assign post harvest
gen m = .
foreach y in 2009 2010 2011 2012 2013 2014 {
foreach m in  1 2 3 4 5 6 7 8 9 10 11 12 13 {
*foreach y in 2010  {
*foreach m in 1  {
if `y' == 2009 {
local harvest = $h2009w
} 
if `y' == 2010 {
local harvest = $h2010w
} 
if `y' == 2011 {
local harvest = $h2011w
}
if `y' == 2012 {
local harvest = $h2012w
}
if `y' == 2013 {
local harvest = $h2013w
}
if `y' == 2014 {
local harvest = $h2014w
}
replace m = `m' if date > `harvest'+(`m'-1)*28 & date <= `harvest'+(`m')*28 & cropyear == `y' 
}
}
sort cropyear m
drop if m == .
collapse (sum) TotTraffic, by(cropyear m)
save "Data/TotTrafficCropYearMonth_Wheat.dta", replace
* Temperature
insheet using "Data/ND_weather.csv", comma clear
keep station_name date tmin tmax
save "Data/Temp.dta", replace
insheet using "Data/IAND_weather.csv", comma clear
keep station_name date tmin tmax
save "Data/Temp2.dta", replace
insheet using "Data/MTSDMN_weather.csv", comma clear
keep station_name date tmin tmax
append using "Data/Temp.dta"
append using "Data/Temp2.dta"
gen O_ST = ""
replace O_ST = "MT" if station_name == "HELENA AIRPORT ASOS MT US"
replace O_ST = "MN" if station_name == "MINNEAPOLIS CRYSTAL AIRPORT MN US"
replace O_ST = "SD" if station_name == "PIERRE REGIONAL AIRPORT SD US"
replace O_ST = "ND" if station_name == "BISMARCK 5 NNW, ND US"
replace O_ST = "IA" if station_name == "DES MOINES INTERNATIONAL AIRPORT IA US"
replace O_ST = "NE" if station_name == "LINCOLN AIRPORT NE US"
* Keep ND, SD, MN, MT
gen keep = 0
replace keep = 1 if O_ST == "ND"
replace keep = 1 if O_ST == "SD"
replace keep = 1 if O_ST == "MT"
replace keep = 1 if O_ST == "MN"
keep if keep == 1
gen temp = string(date, "%9.0f")
gen year = real(substr(temp,1,4))
gen month = real(substr(temp,5,2))
gen day = real(substr(temp,7,2))
drop date
gen date = mdy(month, day, year)
format date %td
gen cropyear = .
replace cropyear = 2009 if year == 2010 & month <= 8
replace cropyear = 2010 if year == 2010 & month >= 9
replace cropyear = 2010 if year == 2011 & month <= 8
replace cropyear = 2011 if year == 2011 & month >= 9
replace cropyear = 2011 if year == 2012 & month <= 8
replace cropyear = 2012 if year == 2012 & month >= 9
replace cropyear = 2012 if year == 2013 & month <= 8
replace cropyear = 2013 if year == 2013 & month >= 9
replace cropyear = 2013 if year == 2014 & month <= 8
replace cropyear = 2014 if year == 2014 & month >= 9
replace cropyear = 2014 if year == 2015 & month <= 8
replace cropyear = 2015 if year == 2015 & month >= 9
* Loop over years and assign post harvest
gen m = .
foreach y in 2009 2010 2011 2012 2013 2014 {
foreach m in  1 2 3 4 5 6 7 8 9 10 11 12 13 {
*foreach y in 2010  {
*foreach m in 1  {
if `y' == 2009 {
local harvest = $h2009w
} 
if `y' == 2010 {
local harvest = $h2010w
} 
if `y' == 2011 {
local harvest = $h2011w
}
if `y' == 2012 {
local harvest = $h2012w
}
if `y' == 2013 {
local harvest = $h2013w
}
if `y' == 2014 {
local harvest = $h2014w
}
replace m = `m' if date > `harvest'+(`m'-1)*28 & date <= `harvest'+(`m')*28 & cropyear == `y' 
}
}
sort O_ST cropyear m
drop if m == .
collapse (mean) tmax tmin, by(O_ST cropyear m)
save "Data/WeatherCropYear_Wheat.dta", replace
* Diesel prices (weekly)
use "Data/Diesel_prices.dta", clear
gen WYEAR = year(Date)
gen WWEEK = week(Date)
gen WMO = month(Date)
gen month = WMO
sort WYEAR WWEEK
rename WeeklyMidwestNo2DieselRetai Pdiesel
rename Date date
keep if WYEAR >= 2010
keep if WYEAR <= 2015
gen year = WYEAR
* Create crop year variable
gen cropyear = .
replace cropyear = 2009 if year == 2010 & month <= 8
replace cropyear = 2010 if year == 2010 & month >= 9
replace cropyear = 2010 if year == 2011 & month <= 8
replace cropyear = 2011 if year == 2011 & month >= 9
replace cropyear = 2011 if year == 2012 & month <= 8
replace cropyear = 2012 if year == 2012 & month >= 9
replace cropyear = 2012 if year == 2013 & month <= 8
replace cropyear = 2013 if year == 2013 & month >= 9
replace cropyear = 2013 if year == 2014 & month <= 8
replace cropyear = 2014 if year == 2014 & month >= 9
replace cropyear = 2014 if year == 2015 & month <= 8
replace cropyear = 2015 if year == 2015 & month >= 9
* Loop over years and assign most post harvest
gen m = .
foreach y in 2009 2010 2011 2012 2013 2014 {
foreach m in  1 2 3 4 5 6 7 8 9 10 11 12 13 {
*foreach y in 2010  {
*foreach m in 1  {
if `y' == 2009 {
local harvest = $h2009w
} 
if `y' == 2010 {
local harvest = $h2010w
} 
if `y' == 2011 {
local harvest = $h2011w
}
if `y' == 2012 {
local harvest = $h2012w
}
if `y' == 2013 {
local harvest = $h2013w
}
if `y' == 2014 {
local harvest = $h2014w
}
replace m = `m' if date > `harvest'+(`m'-1)*28 & date <= `harvest'+(`m')*28 & cropyear == `y' 
}
}
sort cropyear m
drop if m == .
collapse (mean) Pdiesel, by(cropyear m)
save "Data/DieselCropYear_Wheat.dta", replace
** Combine with wheat data for treated states
use "Data/CrudeOilGrain.dta", clear
gen date = mdy(WMO, WDAY, WYEAR)
format date %td
order date
** Focus on wheat
keep if STCC5 == 1137
* Restrict to years for which we have data
keep if WYEAR >= 2010
keep if WYEAR <= 2015
* Keep ND, SD, MN, MT
gen keep = 0
replace keep = 1 if O_ST == "ND"
replace keep = 1 if O_ST == "SD"
replace keep = 1 if O_ST == "MT"
replace keep = 1 if O_ST == "MN"
keep if keep == 1
* Make revenue per ton variable
gen rpt = revenue/(tons*33)
format date %td
*** Create variable for crop years
gen year = WYEAR
gen month = WMO
gen cropyear = 0
replace cropyear = 2009 if year == 2010 & month <= 8
replace cropyear = 2010 if year == 2010 & month >= 9
replace cropyear = 2010 if year == 2011 & month <= 8
replace cropyear = 2011 if year == 2011 & month >= 9
replace cropyear = 2011 if year == 2012 & month <= 8
replace cropyear = 2012 if year == 2012 & month >= 9
replace cropyear = 2012 if year == 2013 & month <= 8
replace cropyear = 2013 if year == 2013 & month >= 9
replace cropyear = 2013 if year == 2014 & month <= 8
replace cropyear = 2014 if year == 2014 & month >= 9
replace cropyear = 2014 if year == 2015 & month <= 8
replace cropyear = 2015 if year == 2015 & month >= 9
** Create harest date variables
gen harvest = .
replace harvest = $h2009w if cropyear == 2009
replace harvest = $h2010w if cropyear == 2010
replace harvest = $h2011w if cropyear == 2011
replace harvest = $h2012w if cropyear == 2012
replace harvest = $h2013w if cropyear == 2013
replace harvest = $h2014w if cropyear == 2014
* Loop over years and assign most post harvest
foreach m in  1 2 3 4 5 6 7 8 9 10 11 12 13 {
gen m`m' = 0
}
gen m = .
foreach y in 2009 2010 2011 2012 2013 2014 {
foreach m in  1 2 3 4 5 6 7 8 9 10 11 12 13 {
*foreach y in 2010  {
*foreach m in 1  {
if `y' == 2009 {
local harvest = $h2009w
} 
if `y' == 2010 {
local harvest = $h2010w
} 
if `y' == 2011 {
local harvest = $h2011w
}
if `y' == 2012 {
local harvest = $h2012w
}
if `y' == 2013 {
local harvest = $h2013w
}
if `y' == 2014 {
local harvest = $h2014w
}
replace m`m' = 1 if date > `harvest'+(`m'-1)*28 & date <= `harvest'+(`m')*28 & cropyear == `y' 
replace m = `m' if date > `harvest'+(`m'-1)*28 & date <= `harvest'+(`m')*28 & cropyear == `y' 
}
}
* Generate count var for whether a shipment occurs
gen Ship = U_Cars > 0
gen ShipMulti = U_Cars >= 24 & U_Cars < 47
gen ShipShuttle = U_Cars > 100
* Create variable for number of observations by county
sort O_FIPS
by O_FIPS: egen FIPSnumobs = count(O_FIPS) 
* Collapse to cropyear m, where m is harvest month
sort O_FIPS cropyear m
collapse (mean) artm rpt (sum) Ship ShipMulti ShipShuttle U_Cars (first) O_ST FIPSnumobs [iweight = Exp_Factor_Th], by(O_FIPS cropyear m)
* Merge in rail traffic data
sort cropyear m
merge m:1 cropyear m using "Data/TotTrafficCropYearMonth_Wheat.dta", nogen
* Merge in monthly oil cars
sort cropyear m
merge m:1 cropyear m using "Data/TempOilCarsCropYearMonth_Wheat.dta", nogen
* Merge in diesel prices
sort cropyear m
merge m:1 cropyear m using "Data/DieselCropYear_Wheat.dta", nogen
* Merge in weather
sort O_ST cropyear m
merge m:1 O_ST cropyear m using "Data/WeatherCropYear_Wheat.dta", nogen
drop if m == .
drop if O_FIPS == .
*** Merge in production (monthly from NASS survey)
sort O_FIPS cropyear
merge m:1 O_FIPS cropyear using "Data/CntyWheatProd.dta", 
drop if _merge == 2
drop _merge
*** Generate log variables
gen lproduction = ln(production+1)
gen lTotTraff = ln(TotTraffic)
gen lcars = ln(U_Cars+1)
gen loilcars = ln(oilcars_stb)
gen lartm = ln(artm)
gen lrpt = ln(rpt)
gen lpdiesel = ln(Pdiesel)
* Make new crop year time variable
gen cropdate = cropyear*100+m 
save "Data/WheatinCropTime.dta", replace

*** Corn - Make a version of the data that shifts everything to "harvest time"
** First, need daily versions of oil cars, total traffic, weather and diesel prices
* Oil cars
use "Data/CrudeOilGrain.dta", clear
* Relvant years
keep if WYEAR >= 2010
keep if WYEAR <= 2015
* ND shipments
keep if O_ST == "ND"
* Akshaya's definitions for oil
replace STCC5_Title = "Oil" if STCC5 == 13111 | STCC5 == 49101 | STCC5 == 49151
* Export temp file with Oil cars
keep if STCC5_Title == "Oil"
gen date = mdy(WMO, WDAY, WYEAR)
format date %td
* Convert to 1000s of cars
replace Cars = Cars/1000
rename Cars oilcars_stb
keep date WYEAR WMO oilcars_stb
*** Create variable for crop years
gen year = WYEAR
gen month = WMO
gen cropyear = 0
replace cropyear = 2009 if year == 2010 & month <= 8
replace cropyear = 2010 if year == 2010 & month >= 9
replace cropyear = 2010 if year == 2011 & month <= 8
replace cropyear = 2011 if year == 2011 & month >= 9
replace cropyear = 2011 if year == 2012 & month <= 8
replace cropyear = 2012 if year == 2012 & month >= 9
replace cropyear = 2012 if year == 2013 & month <= 8
replace cropyear = 2013 if year == 2013 & month >= 9
replace cropyear = 2013 if year == 2014 & month <= 8
replace cropyear = 2014 if year == 2014 & month >= 9
replace cropyear = 2014 if year == 2015 & month <= 8
replace cropyear = 2015 if year == 2015 & month >= 9
* Loop over years and assign most post harvest
gen m = .
foreach y in 2009 2010 2011 2012 2013 2014 {
foreach m in  1 2 3 4 5 6 7 8 9 10 11 12 13 {
*foreach y in 2010  {
*foreach m in 1  {
if `y' == 2009 {
local harvest = $h2009c
} 
if `y' == 2010 {
local harvest = $h2010c
} 
if `y' == 2011 {
local harvest = $h2011c
}
if `y' == 2012 {
local harvest = $h2012c
}
if `y' == 2013 {
local harvest = $h2013c
}
if `y' == 2014 {
local harvest = $h2014c
}
replace m = `m' if date > `harvest'+(`m'-1)*28 & date <= `harvest'+(`m')*28 & cropyear == `y' 
}
}
sort cropyear m
drop if m == .
collapse (sum) oilcars_stb, by(cropyear m)
save "Data/TempOilCarsCropYearMonth_Corn.dta", replace
* Total traffic
use "/Users/Jon/Dropbox/Rail and Oil/Data/DailyCarloads.dta", clear
keep if ORR_Alpha == "BNSF" | ORR_Alpha == "CPUS"
keep if WYEAR >= 2010
keep if WYEAR <= 2015
sort WYEAR WMO WDAY
collapse (sum) TotTraffic, by(WYEAR WMO WDAY)
save "/Users/Jon/Dropbox/Rail and Oil/Data/DailyCarloadsBNSF&CP.dta", replace
gen date = mdy(WMO, WDAY, WYEAR)
*** Create variable for crop years
gen year = WYEAR
gen month = WMO
gen cropyear = 0
replace cropyear = 2009 if year == 2010 & month <= 8
replace cropyear = 2010 if year == 2010 & month >= 9
replace cropyear = 2010 if year == 2011 & month <= 8
replace cropyear = 2011 if year == 2011 & month >= 9
replace cropyear = 2011 if year == 2012 & month <= 8
replace cropyear = 2012 if year == 2012 & month >= 9
replace cropyear = 2012 if year == 2013 & month <= 8
replace cropyear = 2013 if year == 2013 & month >= 9
replace cropyear = 2013 if year == 2014 & month <= 8
replace cropyear = 2014 if year == 2014 & month >= 9
replace cropyear = 2014 if year == 2015 & month <= 8
replace cropyear = 2015 if year == 2015 & month >= 9
* Loop over years and assign most post harvest
gen m = .
foreach y in 2009 2010 2011 2012 2013 2014 {
foreach m in  1 2 3 4 5 6 7 8 9 10 11 12 13 {
*foreach y in 2010  {
*foreach m in 1  {
if `y' == 2009 {
local harvest = $h2009c
} 
if `y' == 2010 {
local harvest = $h2010c
} 
if `y' == 2011 {
local harvest = $h2011c
}
if `y' == 2012 {
local harvest = $h2012c
}
if `y' == 2013 {
local harvest = $h2013c
}
if `y' == 2014 {
local harvest = $h2014c
}
replace m = `m' if date > `harvest'+(`m'-1)*28 & date <= `harvest'+(`m')*28 & cropyear == `y' 
}
}
sort cropyear m
drop if m == .
collapse (sum) TotTraffic, by(cropyear m)
save "Data/TotTrafficCropYearMonth_Corn.dta", replace
* Temperature
insheet using "Data/ND_weather.csv", comma clear
keep station_name date tmin tmax
save "Data/Temp.dta", replace
insheet using "Data/IAND_weather.csv", comma clear
keep station_name date tmin tmax
save "Data/Temp2.dta", replace
insheet using "Data/MTSDMN_weather.csv", comma clear
keep station_name date tmin tmax
append using "Data/Temp.dta"
append using "Data/Temp2.dta"
gen O_ST = ""
replace O_ST = "MT" if station_name == "HELENA AIRPORT ASOS MT US"
replace O_ST = "MN" if station_name == "MINNEAPOLIS CRYSTAL AIRPORT MN US"
replace O_ST = "SD" if station_name == "PIERRE REGIONAL AIRPORT SD US"
replace O_ST = "ND" if station_name == "BISMARCK 5 NNW, ND US"
replace O_ST = "IA" if station_name == "DES MOINES INTERNATIONAL AIRPORT IA US"
replace O_ST = "NE" if station_name == "LINCOLN AIRPORT NE US"
* Keep ND, SD, MN, MT
gen keep = 0
replace keep = 1 if O_ST == "ND"
replace keep = 1 if O_ST == "SD"
replace keep = 1 if O_ST == "MT"
replace keep = 1 if O_ST == "MN"
replace keep = 1 if O_ST == "NE"
replace keep = 1 if O_ST == "IA"
keep if keep == 1
gen temp = string(date, "%9.0f")
gen year = real(substr(temp,1,4))
gen month = real(substr(temp,5,2))
gen day = real(substr(temp,7,2))
drop date
gen date = mdy(month, day, year)
format date %td
gen cropyear = .
replace cropyear = 2009 if year == 2010 & month <= 8
replace cropyear = 2010 if year == 2010 & month >= 9
replace cropyear = 2010 if year == 2011 & month <= 8
replace cropyear = 2011 if year == 2011 & month >= 9
replace cropyear = 2011 if year == 2012 & month <= 8
replace cropyear = 2012 if year == 2012 & month >= 9
replace cropyear = 2012 if year == 2013 & month <= 8
replace cropyear = 2013 if year == 2013 & month >= 9
replace cropyear = 2013 if year == 2014 & month <= 8
replace cropyear = 2014 if year == 2014 & month >= 9
replace cropyear = 2014 if year == 2015 & month <= 8
replace cropyear = 2015 if year == 2015 & month >= 9
* Loop over years and assign most post harvest
gen m = .
foreach y in 2009 2010 2011 2012 2013 2014 {
foreach m in  1 2 3 4 5 6 7 8 9 10 11 12 13 {
*foreach y in 2010  {
*foreach m in 1  {
if `y' == 2009 {
local harvest = $h2009c
} 
if `y' == 2010 {
local harvest = $h2010c
} 
if `y' == 2011 {
local harvest = $h2011c
}
if `y' == 2012 {
local harvest = $h2012c
}
if `y' == 2013 {
local harvest = $h2013c
}
if `y' == 2014 {
local harvest = $h2014c
}
replace m = `m' if date > `harvest'+(`m'-1)*28 & date <= `harvest'+(`m')*28 & cropyear == `y' 
}
}
sort O_ST cropyear m
drop if m == .
collapse (mean) tmax tmin, by(O_ST cropyear m)
save "Data/WeatherCropYear_Corn.dta", replace
* Diesel prices (weekly)
use "Data/Diesel_prices.dta", clear
gen WYEAR = year(Date)
gen WWEEK = week(Date)
gen WMO = month(Date)
gen month = WMO
sort WYEAR WWEEK
rename WeeklyMidwestNo2DieselRetai Pdiesel
rename Date date
keep if WYEAR >= 2010
keep if WYEAR <= 2015
gen year = WYEAR
* Create crop year variable
gen cropyear = .
replace cropyear = 2009 if year == 2010 & month <= 8
replace cropyear = 2010 if year == 2010 & month >= 9
replace cropyear = 2010 if year == 2011 & month <= 8
replace cropyear = 2011 if year == 2011 & month >= 9
replace cropyear = 2011 if year == 2012 & month <= 8
replace cropyear = 2012 if year == 2012 & month >= 9
replace cropyear = 2012 if year == 2013 & month <= 8
replace cropyear = 2013 if year == 2013 & month >= 9
replace cropyear = 2013 if year == 2014 & month <= 8
replace cropyear = 2014 if year == 2014 & month >= 9
replace cropyear = 2014 if year == 2015 & month <= 8
replace cropyear = 2015 if year == 2015 & month >= 9
* Loop over years and assign most post harvest
gen m = .
foreach y in 2009 2010 2011 2012 2013 2014 {
foreach m in  1 2 3 4 5 6 7 8 9 10 11 12 13 {
*foreach y in 2010  {
*foreach m in 1  {
if `y' == 2009 {
local harvest = $h2009c
} 
if `y' == 2010 {
local harvest = $h2010c
} 
if `y' == 2011 {
local harvest = $h2011c
}
if `y' == 2012 {
local harvest = $h2012c
}
if `y' == 2013 {
local harvest = $h2013c
}
if `y' == 2014 {
local harvest = $h2014c
}
replace m = `m' if date > `harvest'+(`m'-1)*28 & date <= `harvest'+(`m')*28 & cropyear == `y' 
}
}
sort cropyear m
drop if m == .
collapse (mean) Pdiesel, by(cropyear m)
save "Data/DieselCropYear_Corn.dta", replace
** Combine with wheat data for treated states
use "Data/CrudeOilGrain.dta", clear
gen date = mdy(WMO, WDAY, WYEAR)
format date %td
order date
** Focus on wheat
keep if STCC5 == 1132
* Restrict to years for which we have data
keep if WYEAR >= 2010
keep if WYEAR <= 2015
* Keep ND, SD, MN, MT
gen keep = 0
replace keep = 1 if O_ST == "NE"
replace keep = 1 if O_ST == "IA"
replace keep = 1 if O_ST == "ND"
replace keep = 1 if O_ST == "SD"
replace keep = 1 if O_ST == "MT"
replace keep = 1 if O_ST == "MN"
keep if keep == 1
* Make revenue per ton variable
gen rpt = revenue/(tons*33)
format date %td
*** Create variable for crop years
gen year = WYEAR
gen month = WMO
gen cropyear = 0
replace cropyear = 2009 if year == 2010 & month <= 8
replace cropyear = 2010 if year == 2010 & month >= 9
replace cropyear = 2010 if year == 2011 & month <= 8
replace cropyear = 2011 if year == 2011 & month >= 9
replace cropyear = 2011 if year == 2012 & month <= 8
replace cropyear = 2012 if year == 2012 & month >= 9
replace cropyear = 2012 if year == 2013 & month <= 8
replace cropyear = 2013 if year == 2013 & month >= 9
replace cropyear = 2013 if year == 2014 & month <= 8
replace cropyear = 2014 if year == 2014 & month >= 9
replace cropyear = 2014 if year == 2015 & month <= 8
replace cropyear = 2015 if year == 2015 & month >= 9
** Create harest date variables
gen harvest = .
replace harvest = $h2009c if cropyear == 2009
replace harvest = $h2010c if cropyear == 2010
replace harvest = $h2011c if cropyear == 2011
replace harvest = $h2012c if cropyear == 2012
replace harvest = $h2013c if cropyear == 2013
replace harvest = $h2014c if cropyear == 2014
* Loop over years and assign most post harvest
foreach m in  1 2 3 4 5 6 7 8 9 10 11 12 13 {
gen m`m' = 0
}
gen m = .
foreach y in 2009 2010 2011 2012 2013 2014 {
foreach m in  1 2 3 4 5 6 7 8 9 10 11 12 13 {
*foreach y in 2010  {
*foreach m in 1  {
if `y' == 2009 {
local harvest = $h2009c
} 
if `y' == 2010 {
local harvest = $h2010c
} 
if `y' == 2011 {
local harvest = $h2011c
}
if `y' == 2012 {
local harvest = $h2012c
}
if `y' == 2013 {
local harvest = $h2013c
}
if `y' == 2014 {
local harvest = $h2014c
}
replace m`m' = 1 if date > `harvest'+(`m'-1)*28 & date <= `harvest'+(`m')*28 & cropyear == `y' 
replace m = `m' if date > `harvest'+(`m'-1)*28 & date <= `harvest'+(`m')*28 & cropyear == `y' 
}
}
* Collapse to cropyear m, where m is harvest month
sort O_FIPS cropyear m
collapse (mean) artm rpt (sum) U_Cars (first) O_ST [iweight = Exp_Factor_Th], by(O_FIPS cropyear m)
* Merge in rail traffic data
sort cropyear m
merge m:1 cropyear m using "Data/TotTrafficCropYearMonth_Corn.dta", nogen
* Merge in monthly oil cars
sort cropyear m
merge m:1 cropyear m using "Data/TempOilCarsCropYearMonth_Corn.dta", nogen
* Merge in diesel prices
sort cropyear m
merge m:1 cropyear m using "Data/DieselCropYear_Corn.dta", nogen
* Merge in weather
sort O_ST cropyear m
merge m:1 O_ST cropyear m using "Data/WeatherCropYear_Corn.dta", nogen
drop if m == .
drop if O_FIPS == .
*** Merge in production (monthly from NASS survey)
sort O_FIPS cropyear
merge m:1 O_FIPS cropyear using "Data/CntyCornProd.dta", 
drop if _merge == 2
drop _merge
*** Generate log variables
gen production = corn
gen lproduction = ln(corn+1)
gen lTotTraff = ln(TotTraffic)
gen lcars = ln(U_Cars+1)
gen loilcars = ln(oilcars_stb)
gen lartm = ln(artm)
gen lrpt = ln(rpt)
gen lpdiesel = ln(Pdiesel)
* Make new crop year time variable
gen cropdate = cropyear*100+m 
save "Data/CorninCropTime.dta", replace

*** Soy- Make a version of the data that shifts everything to "harvest time"
** First, need daily versions of oil cars, total traffic, weather and diesel prices
* Oil cars
use "Data/CrudeOilGrain.dta", clear
* Relvant years
keep if WYEAR >= 2010
keep if WYEAR <= 2015
* ND shipments
keep if O_ST == "ND"
* Akshaya's definitions for oil
replace STCC5_Title = "Oil" if STCC5 == 13111 | STCC5 == 49101 | STCC5 == 49151
* Export temp file with Oil cars
keep if STCC5_Title == "Oil"
gen date = mdy(WMO, WDAY, WYEAR)
format date %td
* Convert to 1000s of cars
replace Cars = Cars/1000
rename Cars oilcars_stb
keep date WYEAR WMO oilcars_stb
*** Create variable for crop years
gen year = WYEAR
gen month = WMO
gen cropyear = 0
replace cropyear = 2009 if year == 2010 & month <= 8
replace cropyear = 2010 if year == 2010 & month >= 9
replace cropyear = 2010 if year == 2011 & month <= 8
replace cropyear = 2011 if year == 2011 & month >= 9
replace cropyear = 2011 if year == 2012 & month <= 8
replace cropyear = 2012 if year == 2012 & month >= 9
replace cropyear = 2012 if year == 2013 & month <= 8
replace cropyear = 2013 if year == 2013 & month >= 9
replace cropyear = 2013 if year == 2014 & month <= 8
replace cropyear = 2014 if year == 2014 & month >= 9
replace cropyear = 2014 if year == 2015 & month <= 8
replace cropyear = 2015 if year == 2015 & month >= 9
* Loop over years and assign most post harvest
gen m = .
foreach y in 2009 2010 2011 2012 2013 2014 {
foreach m in  1 2 3 4 5 6 7 8 9 10 11 12 13 {
*foreach y in 2010  {
*foreach m in 1  {
if `y' == 2009 {
local harvest = $h2009s
} 
if `y' == 2010 {
local harvest = $h2010s
} 
if `y' == 2011 {
local harvest = $h2011s
}
if `y' == 2012 {
local harvest = $h2012s
}
if `y' == 2013 {
local harvest = $h2013s
}
if `y' == 2014 {
local harvest = $h2014s
}
replace m = `m' if date > `harvest'+(`m'-1)*28 & date <= `harvest'+(`m')*28 & cropyear == `y' 
}
}
sort cropyear m
drop if m == .
collapse (sum) oilcars_stb, by(cropyear m)
save "Data/TempOilCarsCropYearMonth_Soy.dta", replace
* Total traffic
use "/Users/Jon/Dropbox/Rail and Oil/Data/DailyCarloads.dta", clear
keep if ORR_Alpha == "BNSF" | ORR_Alpha == "CPUS"
keep if WYEAR >= 2010
keep if WYEAR <= 2015
sort WYEAR WMO WDAY
collapse (sum) TotTraffic, by(WYEAR WMO WDAY)
save "/Users/Jon/Dropbox/Rail and Oil/Data/DailyCarloadsBNSF&CP.dta", replace
gen date = mdy(WMO, WDAY, WYEAR)
*** Create variable for crop years
gen year = WYEAR
gen month = WMO
gen cropyear = 0
replace cropyear = 2009 if year == 2010 & month <= 8
replace cropyear = 2010 if year == 2010 & month >= 9
replace cropyear = 2010 if year == 2011 & month <= 8
replace cropyear = 2011 if year == 2011 & month >= 9
replace cropyear = 2011 if year == 2012 & month <= 8
replace cropyear = 2012 if year == 2012 & month >= 9
replace cropyear = 2012 if year == 2013 & month <= 8
replace cropyear = 2013 if year == 2013 & month >= 9
replace cropyear = 2013 if year == 2014 & month <= 8
replace cropyear = 2014 if year == 2014 & month >= 9
replace cropyear = 2014 if year == 2015 & month <= 8
replace cropyear = 2015 if year == 2015 & month >= 9
* Loop over years and assign most post harvest
gen m = .
foreach y in 2009 2010 2011 2012 2013 2014 {
foreach m in  1 2 3 4 5 6 7 8 9 10 11 12 13 {
*foreach y in 2010  {
*foreach m in 1  {
if `y' == 2009 {
local harvest = $h2009s
} 
if `y' == 2010 {
local harvest = $h2010s
} 
if `y' == 2011 {
local harvest = $h2011s
}
if `y' == 2012 {
local harvest = $h2012s
}
if `y' == 2013 {
local harvest = $h2013s
}
if `y' == 2014 {
local harvest = $h2014s
}
replace m = `m' if date > `harvest'+(`m'-1)*28 & date <= `harvest'+(`m')*28 & cropyear == `y' 
}
}
sort cropyear m
drop if m == .
collapse (sum) TotTraffic, by(cropyear m)
save "Data/TotTrafficCropYearMonth_Soy.dta", replace
* Temperature
insheet using "Data/ND_weather.csv", comma clear
keep station_name date tmin tmax
save "Data/Temp.dta", replace
insheet using "Data/IAND_weather.csv", comma clear
keep station_name date tmin tmax
save "Data/Temp2.dta", replace
insheet using "Data/MTSDMN_weather.csv", comma clear
keep station_name date tmin tmax
append using "Data/Temp.dta"
append using "Data/Temp2.dta"
gen O_ST = ""
replace O_ST = "MT" if station_name == "HELENA AIRPORT ASOS MT US"
replace O_ST = "MN" if station_name == "MINNEAPOLIS CRYSTAL AIRPORT MN US"
replace O_ST = "SD" if station_name == "PIERRE REGIONAL AIRPORT SD US"
replace O_ST = "ND" if station_name == "BISMARCK 5 NNW, ND US"
replace O_ST = "IA" if station_name == "DES MOINES INTERNATIONAL AIRPORT IA US"
replace O_ST = "NE" if station_name == "LINCOLN AIRPORT NE US"
* Keep ND, SD, MN, MT
gen keep = 0
replace keep = 1 if O_ST == "ND"
replace keep = 1 if O_ST == "SD"
replace keep = 1 if O_ST == "MT"
replace keep = 1 if O_ST == "MN"
replace keep = 1 if O_ST == "IA"
replace keep = 1 if O_ST == "NE"
keep if keep == 1
gen temp = string(date, "%9.0f")
gen year = real(substr(temp,1,4))
gen month = real(substr(temp,5,2))
gen day = real(substr(temp,7,2))
drop date
gen date = mdy(month, day, year)
format date %td
gen cropyear = .
replace cropyear = 2009 if year == 2010 & month <= 8
replace cropyear = 2010 if year == 2010 & month >= 9
replace cropyear = 2010 if year == 2011 & month <= 8
replace cropyear = 2011 if year == 2011 & month >= 9
replace cropyear = 2011 if year == 2012 & month <= 8
replace cropyear = 2012 if year == 2012 & month >= 9
replace cropyear = 2012 if year == 2013 & month <= 8
replace cropyear = 2013 if year == 2013 & month >= 9
replace cropyear = 2013 if year == 2014 & month <= 8
replace cropyear = 2014 if year == 2014 & month >= 9
replace cropyear = 2014 if year == 2015 & month <= 8
replace cropyear = 2015 if year == 2015 & month >= 9
* Loop over years and assign most post harvest
gen m = .
foreach y in 2009 2010 2011 2012 2013 2014 {
foreach m in  1 2 3 4 5 6 7 8 9 10 11 12 13 {
*foreach y in 2010  {
*foreach m in 1  {
if `y' == 2009 {
local harvest = $h2009s
} 
if `y' == 2010 {
local harvest = $h2010s
} 
if `y' == 2011 {
local harvest = $h2011s
}
if `y' == 2012 {
local harvest = $h2012s
}
if `y' == 2013 {
local harvest = $h2013s
}
if `y' == 2014 {
local harvest = $h2014s
}
replace m = `m' if date > `harvest'+(`m'-1)*28 & date <= `harvest'+(`m')*28 & cropyear == `y' 
}
}
sort O_ST cropyear m
drop if m == .
collapse (mean) tmax tmin, by(O_ST cropyear m)
save "Data/WeatherCropYear_Soy.dta", replace
* Diesel prices (weekly)
use "Data/Diesel_prices.dta", clear
gen WYEAR = year(Date)
gen WWEEK = week(Date)
gen WMO = month(Date)
gen month = WMO
sort WYEAR WWEEK
rename WeeklyMidwestNo2DieselRetai Pdiesel
rename Date date
keep if WYEAR >= 2010
keep if WYEAR <= 2015
gen year = WYEAR
* Create crop year variable
gen cropyear = .
replace cropyear = 2009 if year == 2010 & month <= 8
replace cropyear = 2010 if year == 2010 & month >= 9
replace cropyear = 2010 if year == 2011 & month <= 8
replace cropyear = 2011 if year == 2011 & month >= 9
replace cropyear = 2011 if year == 2012 & month <= 8
replace cropyear = 2012 if year == 2012 & month >= 9
replace cropyear = 2012 if year == 2013 & month <= 8
replace cropyear = 2013 if year == 2013 & month >= 9
replace cropyear = 2013 if year == 2014 & month <= 8
replace cropyear = 2014 if year == 2014 & month >= 9
replace cropyear = 2014 if year == 2015 & month <= 8
replace cropyear = 2015 if year == 2015 & month >= 9
* Loop over years and assign most post harvest
gen m = .
foreach y in 2009 2010 2011 2012 2013 2014 {
foreach m in  1 2 3 4 5 6 7 8 9 10 11 12 13 {
*foreach y in 2010  {
*foreach m in 1  {
if `y' == 2009 {
local harvest = $h2009s
} 
if `y' == 2010 {
local harvest = $h2010s
} 
if `y' == 2011 {
local harvest = $h2011s
}
if `y' == 2012 {
local harvest = $h2012s
}
if `y' == 2013 {
local harvest = $h2013s
}
if `y' == 2014 {
local harvest = $h2014s
}
replace m = `m' if date > `harvest'+(`m'-1)*28 & date <= `harvest'+(`m')*28 & cropyear == `y' 
}
}
sort cropyear m
drop if m == .
collapse (mean) Pdiesel, by(cropyear m)
save "Data/DieselCropYear_Soy.dta", replace
** Combine with wheat data for treated states
use "Data/CrudeOilGrain.dta", clear
gen date = mdy(WMO, WDAY, WYEAR)
format date %td
order date
** Focus on wheat
keep if STCC5 == 1132
* Restrict to years for which we have data
keep if WYEAR >= 2010
keep if WYEAR <= 2015
* Keep ND, SD, MN, MT
gen keep = 0
replace keep = 1 if O_ST == "NE"
replace keep = 1 if O_ST == "IA"
replace keep = 1 if O_ST == "ND"
replace keep = 1 if O_ST == "SD"
replace keep = 1 if O_ST == "MT"
replace keep = 1 if O_ST == "MN"
keep if keep == 1
* Make revenue per ton variable
gen rpt = revenue/(tons*33)
format date %td
*** Create variable for crop years
gen year = WYEAR
gen month = WMO
gen cropyear = 0
replace cropyear = 2009 if year == 2010 & month <= 8
replace cropyear = 2010 if year == 2010 & month >= 9
replace cropyear = 2010 if year == 2011 & month <= 8
replace cropyear = 2011 if year == 2011 & month >= 9
replace cropyear = 2011 if year == 2012 & month <= 8
replace cropyear = 2012 if year == 2012 & month >= 9
replace cropyear = 2012 if year == 2013 & month <= 8
replace cropyear = 2013 if year == 2013 & month >= 9
replace cropyear = 2013 if year == 2014 & month <= 8
replace cropyear = 2014 if year == 2014 & month >= 9
replace cropyear = 2014 if year == 2015 & month <= 8
replace cropyear = 2015 if year == 2015 & month >= 9
** Create harest date variables
gen harvest = .
replace harvest = $h2009s if cropyear == 2009
replace harvest = $h2010s if cropyear == 2010
replace harvest = $h2011s if cropyear == 2011
replace harvest = $h2012s if cropyear == 2012
replace harvest = $h2013s if cropyear == 2013
replace harvest = $h2014s if cropyear == 2014
* Loop over years and assign most post harvest
foreach m in  1 2 3 4 5 6 7 8 9 10 11 12 13 {
gen m`m' = 0
}
gen m = .
foreach y in 2009 2010 2011 2012 2013 2014 {
foreach m in  1 2 3 4 5 6 7 8 9 10 11 12 13 {
*foreach y in 2010  {
*foreach m in 1  {
if `y' == 2009 {
local harvest = $h2009s
} 
if `y' == 2010 {
local harvest = $h2010s
} 
if `y' == 2011 {
local harvest = $h2011s
}
if `y' == 2012 {
local harvest = $h2012s
}
if `y' == 2013 {
local harvest = $h2013s
}
if `y' == 2014 {
local harvest = $h2014s
}
replace m`m' = 1 if date > `harvest'+(`m'-1)*28 & date <= `harvest'+(`m')*28 & cropyear == `y' 
replace m = `m' if date > `harvest'+(`m'-1)*28 & date <= `harvest'+(`m')*28 & cropyear == `y' 
}
}
* Collapse to cropyear m, where m is harvest month
sort O_FIPS cropyear m
collapse (mean) artm rpt (sum) U_Cars (first) O_ST [iweight = Exp_Factor_Th], by(O_FIPS cropyear m)
* Merge in rail traffic data
sort cropyear m
merge m:1 cropyear m using "Data/TotTrafficCropYearMonth_Soy.dta", nogen
* Merge in monthly oil cars
sort cropyear m
merge m:1 cropyear m using "Data/TempOilCarsCropYearMonth_Soy.dta", nogen
* Merge in diesel prices
sort cropyear m
merge m:1 cropyear m using "Data/DieselCropYear_Soy.dta", nogen
* Merge in weather
sort O_ST cropyear m
merge m:1 O_ST cropyear m using "Data/WeatherCropYear_Soy.dta", nogen
drop if m == .
drop if O_FIPS == .
*** Merge in production (monthly from NASS survey)
sort O_FIPS cropyear
merge m:1 O_FIPS cropyear using "Data/CntySoyProd.dta", 
drop if _merge == 2
drop _merge
*** Generate log variables
gen production = soy
gen lproduction = ln(soy+1)
gen lTotTraff = ln(TotTraffic)
gen lcars = ln(U_Cars+1)
gen loilcars = ln(oilcars_stb)
gen lartm = ln(artm)
gen lrpt = ln(rpt)
gen lpdiesel = ln(Pdiesel)
* Make new crop year time variable
gen cropdate = cropyear*100+m 
save "Data/SoyinCropTime.dta", replace

* Make data for probit regressions of east versus west coast shipments
use "Data/CrudeOilGrain.dta", clear
gen date = mdy(WMO, WDAY, WYEAR)
format date %td
order date
** Focus on wheat
keep if STCC5 == 1137
* Restrict to years for which we have data
keep if WYEAR >= 2010
keep if WYEAR <= 2015
* Keep ND, SD, MN, MT
gen keep = 0
replace keep = 1 if O_ST == "ND"
replace keep = 1 if O_ST == "SD"
replace keep = 1 if O_ST == "MT"
replace keep = 1 if O_ST == "MN"
keep if keep == 1
* Make revenue per ton variable
gen rpt = revenue/(tons*33)
format date %td
*** Create variable for crop years
gen year = WYEAR
gen month = WMO
gen cropyear = 0
replace cropyear = 2009 if year == 2010 & month <= 8
replace cropyear = 2010 if year == 2010 & month >= 9
replace cropyear = 2010 if year == 2011 & month <= 8
replace cropyear = 2011 if year == 2011 & month >= 9
replace cropyear = 2011 if year == 2012 & month <= 8
replace cropyear = 2012 if year == 2012 & month >= 9
replace cropyear = 2012 if year == 2013 & month <= 8
replace cropyear = 2013 if year == 2013 & month >= 9
replace cropyear = 2013 if year == 2014 & month <= 8
replace cropyear = 2014 if year == 2014 & month >= 9
replace cropyear = 2014 if year == 2015 & month <= 8
replace cropyear = 2015 if year == 2015 & month >= 9
** Create harest date variables
gen harvest = .
replace harvest = $h2009w if cropyear == 2009
replace harvest = $h2010w if cropyear == 2010
replace harvest = $h2011w if cropyear == 2011
replace harvest = $h2012w if cropyear == 2012
replace harvest = $h2013w if cropyear == 2013
replace harvest = $h2014w if cropyear == 2014
* Loop over years and assign most post harvest
foreach m in  1 2 3 4 5 6 7 8 9 10 11 12 13 {
gen m`m' = 0
}
gen m = .
foreach y in 2009 2010 2011 2012 2013 2014 {
foreach m in  1 2 3 4 5 6 7 8 9 10 11 12 13 {
*foreach y in 2010  {
*foreach m in 1  {
if `y' == 2009 {
local harvest = $h2009w
} 
if `y' == 2010 {
local harvest = $h2010w
} 
if `y' == 2011 {
local harvest = $h2011w
}
if `y' == 2012 {
local harvest = $h2012w
}
if `y' == 2013 {
local harvest = $h2013w
}
if `y' == 2014 {
local harvest = $h2014w
}
replace m`m' = 1 if date > `harvest'+(`m'-1)*28 & date <= `harvest'+(`m')*28 & cropyear == `y' 
replace m = `m' if date > `harvest'+(`m'-1)*28 & date <= `harvest'+(`m')*28 & cropyear == `y' 
}
}
* Merge in rail traffic data
sort cropyear m
merge m:1 cropyear m using "Data/TotTrafficCropYearMonth_Wheat.dta", nogen
* Merge in monthly oil cars
sort cropyear m
merge m:1 cropyear m using "Data/TempOilCarsCropYearMonth_Wheat.dta", nogen
* Merge in diesel prices
sort cropyear m
merge m:1 cropyear m using "Data/DieselCropYear_Wheat.dta", nogen
* Merge in weather
sort O_ST cropyear m
merge m:1 O_ST cropyear m using "Data/WeatherCropYear_Wheat.dta", nogen
drop if m == .
drop if O_FIPS == .
*** Merge in production (monthly from NASS survey)
sort O_FIPS cropyear
merge m:1 O_FIPS cropyear using "Data/CntyWheatProd.dta", 
drop if _merge == 2
drop _merge
*** Generate log variables
gen lproduction = ln(production+1)
gen lTotTraff = ln(TotTraffic)
gen lcars = ln(U_Cars)
gen loilcars = ln(oilcars_stb)
gen lartm = ln(artm)
gen lrpt = ln(rpt)
gen lpdiesel = ln(Pdiesel)
* Make new crop year time variable
gen cropdate = cropyear*100+m 
* Make variable for west coast destination
gen West = T_ST == "WA" | T_ST == "OR" | T_ST == "CA"
** Merge in latitude and longitude data
sor O_FIPS
merge m:1 O_FIPS using  "Data/FIPSlatlong.dta",
keep if _merge == 3
drop _merge
* Calculate (distance as the crow flies) to Minneapolis
geodist latitude longitude 44.9706756 -93.3315183, gen(MNdist) miles
save "Data/WestProbits.dta", replace

* THE END *
